* This script contains the code to replicate Figure 7 in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.
log using Figure7.log, replace
use prodndnp_fullsample.dta, clear

set matsize 800
set more off

/*******OLS REGRESIONS**************/

/*MODEL 0*/
forval i = 1(1)5{

reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 if qnt==`i' & group==1 & t>5
gen e=1 if e(sample)
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM0= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM0)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM0=(rssols/nols)^0.5   /*RMSE out of sample*/

di n1 " Number of observations in the in-sample group"
di rmseo2olsM0 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEM0,rmseo2olsM0, qnt,`i') excel
drop peols ybols

/*MODEL 1*/

reg lprodf3 y7-y27 nopapers t7-t27 lcprod5 lcprodl5 if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM1= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM1)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM1=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM1= sqerrolsM1-sqerrolsM0
newey diffsqerrlM1 if ybols<., lag(0)
scalar diffrmseoutM1=((rmseo2olsM0-rmseo2olsM1)/rmseo2olsM0)*100

di n1 " Number of observations in the in-sample group"
di rmseo2olsM1 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM1 "RMSE % difference between Model 1 and Model 0"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEM1,rmseo2olsM1, RMSE % diff M1, diffrmseoutM1, qnt,`i') excel
drop sqerrolsM1 peols ybols diffsqerrlM1

/*****MOdel 2*****/

/*Coauthors' productivity*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lnetprod5y if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2) 
qui gen sqerrolsM2net= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2net)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsnet=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2net= sqerrolsM2net-sqerrolsM0
newey diffsqerrlM2net if ybols<., lag(0)
scalar diffrmseoutM2net=((rmseo2olsM0-rmseo2olsnet)/rmseo2olsM0)*100

di n1 " Number of observations in the in-sample group"
di rmseo2olsnet "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEnet,rmseo2olsnet,RMSE % diff M2net, diffrmseoutM2net, qnt,`i') excel
drop sqerrolsM2net peols ybols diffsqerrlM2net


/*Coauthors' coauthors productivity*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lnetprod25y if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM2net2= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2net2)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsnet2=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2net2= sqerrolsM2net2-sqerrolsM0
newey diffsqerrlM2net2 if ybols<., lag(0)
scalar diffrmseoutM2net2=((rmseo2olsM0-rmseo2olsnet2)/rmseo2olsM0)*100
di n1 " Number of observations in the in-sample group"
di rmseo2olsnet2 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEnet2,rmseo2olsnet2,RMSE % diff M2net2, diffrmseoutM2net2, qnt,`i') excel
drop sqerrolsM2net2 diffsqerrlM2net2 peols ybols

/*Degree*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 degree5y if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)  
qui gen sqerrolsM2deg= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2deg)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsdeg=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2deg= sqerrolsM2deg-sqerrolsM0
newey diffsqerrlM2deg if ybols<., lag(0)
scalar diffrmseoutM2deg=((rmseo2olsM0-rmseo2olsdeg)/rmseo2olsM0)*100

di n1 " Number of observations in the in-sample group"
di rmseo2olsdeg "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEdeg,rmseo2olsdeg,RMSE % diff M2deg, diffrmseoutM2deg, qnt,`i') excel
drop sqerrolsM2deg diffsqerrlM2deg peols ybols


/*Degree 2*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 degree25y  if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM2deg2= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2deg2)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsdeg2=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2deg2= sqerrolsM2deg2-sqerrolsM0
newey diffsqerrlM2deg2 if ybols<., lag(0)
scalar diffrmseoutM2deg2=((rmseo2olsM0-rmseo2olsdeg2)/rmseo2olsM0)*100
di n1 " Number of observations in the in-sample group"
di rmseo2olsdeg2 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEdeg2,rmseo2olsdeg2,RMSE % diff M2deg2, diffrmseoutM2deg2, qnt,`i') excel
drop sqerrolsM2deg2 diffsqerrlM2deg2 peols ybols

/* Giant component*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 gc5y if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM2gc= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2gc)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsgc=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2gc= sqerrolsM2gc-sqerrolsM0
newey diffsqerrlM2gc if ybols<., lag(0)
scalar diffrmseoutM2gc=((rmseo2olsM0-rmseo2olsgc)/rmseo2olsM0)*100
di n1 " Number of observations in the in-sample group"
di rmseo2olsgc "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEgc,rmseo2olsgc,RMSE % diff M2gc, diffrmseoutM2gc, qnt,`i') excel
drop sqerrolsM2gc diffsqerrlM2gc peols ybols

/* Betweenness*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 gc5y lbet5y if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM2bet= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2bet)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsbet=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2bet= sqerrolsM2bet-sqerrolsM0
newey diffsqerrlM2bet if ybols<., lag(0)
scalar diffrmseoutM2bet=((rmseo2olsM0-rmseo2olsbet)/rmseo2olsM0)*100

di n1 " Number of observations in the in-sample group"
di rmseo2olsbet "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEbet,rmseo2olsbet,RMSE % diff M2bet, diffrmseoutM2bet, qnt,`i') excel
drop sqerrolsM2bet diffsqerrlM2bet peols ybols

/*Closeness*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 gc5y lclos5y if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM2cl= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2cl)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olscl=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2cl= sqerrolsM2cl-sqerrolsM0
newey diffsqerrlM2cl if ybols<., lag(0)
scalar diffrmseoutM2cl=((rmseo2olsM0-rmseo2olscl)/rmseo2olsM0)*100

di n1 " Number of observations in the in-sample group"
di rmseo2olscl "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEcl,rmseo2olscl,RMSE % diff M2cl, diffrmseoutM2cl, qnt,`i') excel
drop sqerrolsM2cl diffsqerrlM2cl peols ybols

/*Coauthors top 1 dummy variable*/
reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 neiq1fs5y  if e==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<. & qnt==`i' & t>5, xb 
scalar r2i1ols= e(r2)   
qui gen sqerrolsM2nei= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2nei)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsnei=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2nei= sqerrolsM2nei-sqerrolsM0
newey diffsqerrlM2nei if ybols<., lag(0)
scalar diffrmseoutM2nei=((rmseo2olsM0-rmseo2olsnei)/rmseo2olsM0)*100
di n1 " Number of observations in the in-sample group"
di rmseo2olsnei "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
outreg2 using quantiles, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, RMSEnei,rmseo2olsnei,RMSE % diff M2nei, diffrmseoutM2nei, qnt,`i') excel
drop sqerrolsM2nei diffsqerrlM2nei peols ybols
drop e sqerrolsM0
}

log close

/*Data editing*/
insheet using "quantiles.txt", clear
gen n=_n
drop if n<49
drop if n>72
drop if n==50  /*Giant component and cloneness centrality are not reported in Figure 7*/
drop if n==63 
drop if n==64
drop if n==59
drop if n==60
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v n
rename v2 v
drop variable 

reshape wide v, i(_j) j(varlabel)



foreach i in v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 v16 v17 v18{
bys v19: egen m`i'=max(`i') 
drop `i'
}
label variable mv1 `"Observations"'
label variable mv2 `"Model 1"'
label variable mv3 "Betweenness"
label variable mv4 "Degree"
label variable mv5 "Degree of order 2"
label variable mv6 `"Top 1%"'
label variable mv7 "Coauthors' productivity"
label variable mv8 "Coauthors' coauthors prod."
label variable mv9 `"RMSEM0"'
label variable mv10 `"RMSEM1"'
label variable mv11 "RMSE Betweenness"
label variable mv12 "RMSE Degree"
label variable mv13 "RMSE Degree 2"
label variable mv14 `"RMSE Working with 1% top author"'
label variable mv15 "RMSE Coauthors' productivity"
label variable mv16 "RMSE Coauthors' coauthors productivity"
label variable mv17 `"n_in"'
label variable mv18 `"n_os"'
label variable v19 `"qnt"'

drop _j

rename mv1 obs 
rename mv2 rmsediffm1m0 
rename mv3 rmsediffbetm0 
rename mv4 rmsediffdegm0 
rename mv5 rmsediffdeg2m0 
rename mv6 rmsedifftop1m0 
rename mv7 rmsediffnetm0 
rename mv8 rmsediffnet2m0 
rename mv9 rmsem0 
rename mv10 rmsem1 
rename mv11 rmsem2bet
rename mv12 rmsem2deg
rename mv13 rmsem2deg2
rename mv14 rmsem2top1
rename mv15 rmsem2net
rename mv16 rmsem2net2
rename mv17 nin
rename mv18 nos
rename v19 qnt

duplicates drop
drop obs
save quantiles,replace

/* Plots in Figure 7*/
graph hbar (asis) rmsediffbetm0 rmsediffnetm0 rmsediffnet2m0 rmsediffdegm0 rmsediffdeg2m0 rmsedifftop1m0 rmsediffm1m0 if qnt==1, bargap(20) blabel(name) ylabel(0(0.5)4.5) title(Tier 1 (>99%)) legend(off) name(Tier1, replace) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Tier1.eps", as(eps) preview(off) replace
graph hbar (asis) rmsediffbetm0 rmsediffnetm0 rmsediffnet2m0 rmsediffdegm0 rmsediffdeg2m0 rmsedifftop1m0 rmsediffm1m0 if qnt==2, bargap(20) blabel(name) ylabel(0(0.5)2) title(Tier 2 (95-98%)) legend(off) name(Tier2, replace) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Tier2.eps", as(eps) preview(off) replace
graph hbar (asis) rmsediffbetm0 rmsediffnetm0 rmsediffnet2m0 rmsediffdegm0 rmsediffdeg2m0 rmsedifftop1m0 rmsediffm1m0 if qnt==3, bargap(20) blabel(name) ylabel(0(0.5)2) title(Tier 3 (90-94%)) legend(off) name(Tier3, replace) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Tier3.eps", as(eps) preview(off) replace
graph hbar (asis) rmsediffbetm0 rmsediffnetm0 rmsediffnet2m0 rmsediffdegm0 rmsediffdeg2m0 rmsedifftop1m0 rmsediffm1m0 if qnt==4, bargap(20) blabel(name) ylabel(0(0.5)2) title(Tier 4 (80-89%)) legend(off) name(Tier4, replace) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Tier4.eps", as(eps) preview(off) replace
graph hbar (asis) rmsediffbetm0 rmsediffnetm0 rmsediffnet2m0 rmsediffdegm0 rmsediffdeg2m0 rmsedifftop1m0 rmsediffm1m0 if qnt==5, bargap(20) blabel(name) ylabel(0(0.5)2) title(Tier 5 (50-79%)) legend(off) name(Tier5, replace) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph export "Tier5.eps", as(eps) preview(off) replace




